########## This script presents robustness checks by using a different operationalization of the treatment variable (i.e. length of basic school).
########## Treatment assignment is based on self-reported years of basic school attendance and birth cohort.
########## This script creates table in the appendix: Table A4. Robustness checks: Ordered logit models of attitude toward the length of basic education.
########## However, the script enables creation of all outputs in the main article using this operationalization of the treatment.

# Setting the working directory to the "Replication" folder (the folder stores the data set used in the article)
setwd("C:/Users/ivan.petrusek/Desktop/Replication")

# Reading the merged Czech Attitude Barometer data set into R (wave 1 and wave 2)
library(haven)
cab <- read_sav("CAB_waves_1_and_2.sav", user_na = FALSE)

###### The alternative independent variable (treatment condition) is based on generations defined by the education system AND self-reported answers
### Creating the generation-based treatment variable
# 0 = 8 year basic school = (year_birth >= 1939 AND <= 1946) OR (year_birth >= 1966 AND year_birth <= 1982)
# 1 = 9 year basic school = (year_birth < 1939) OR (year_birth > 1946 AND year_birth < 1966) OR year_birth > 1982
cab_listwise$gen_9 <- 0

# Assign 1 based on your second set of conditions
cab_listwise$gen_9[cab_listwise$year_birth < 1939 | 
                     (cab_listwise$year_birth > 1946 & cab_listwise$year_birth < 1966) | 
                     cab_listwise$year_birth > 1982] <- 1

table(cab_listwise$gen_9)
table(cab_listwise$devitka_excl)

# Crosstabulating two different operationalizations of the key independent variable
# 90 % of all respondents are classified in the same group in both operationalizations
cross_table <- table(cab_listwise$devitka_excl, cab_listwise$gen_9)
sum(diag(cross_table)) / sum(cross_table)

##### !!!!! Creating an alternative definition of the key independent variable (i.e. the treatment)
### Based on generations defined by the education system AND self-reported answers
# only respondents with the same treatment assignment based on both methods are kept 
cab_listwise$gen_9_self <- NA
cab_listwise$gen_9_self[cab_listwise$gen_9 == 0 & cab_listwise$devitka_excl == 0] <- 0
cab_listwise$gen_9_self[cab_listwise$gen_9 == 1 & cab_listwise$devitka_excl == 1] <- 1


# Excluding respondents who contain at least one missing value (on the variables employed in our analyses)
cab_listwise_gen_9_self <- cab_listwise[complete.cases(cab_listwise[, c("w1_vzdel_2", "gen_9_self", "years_since", "high_interest",
                                           "no_kids", "has_kids", "w1_left_right",
                                           "female", "university", "interakce")]), ]
# This results in 1207 respondents in total
nrow(cab_listwise_gen_9_self)


##### Table 2 (under the alternative operationalization of the treatment).
raw_table <- table(cab_listwise$gen_9_self, cab_listwise$w1_vzdel_2)
row_perc <- round(prop.table(raw_table, margin = 1) * 100, 2)
# Row marginal counts
row_ns <- rowSums(raw_table)
total_n <- sum(raw_table)

# Column marginal percentages
prop.table(table(cab_listwise$w1_vzdel_2)) * 100

# Chi-square test of independence
chisq_test <- chisq.test(raw_table)
print(chisq_test)

# Cramér's V
phi <- sqrt(chisq_test$statistic / sum(raw_table))
v_value <- phi / sqrt(min(dim(raw_table)) - 1)
print(v_value)


##### Figure 1 (under the alternative operationalization of the treatment).
### Preparing the data: calculate means and 3-year moving average
# Aggregating mean of attitudes by year of birth
agg_data <- aggregate(w1_vzdel_2 ~ year_birth, 
                      data = cab_listwise_gen_9_self, 
                      FUN = mean, na.rm = TRUE)

# Calculating 3-year moving average using stats::filter
ma <- stats::filter(agg_data$w1_vzdel_2, rep(1/3, 3), sides = 2)

### Setting up the plotting area and othe figure parameters
par(mar = c(4, 4, 0.5, 1) + 0.1)
# Creating the empty plot first to define coordinates
plot(x = agg_data$year_birth, y = ma, 
     type = "n", 
     ylim = c(1, 5), xlim = c(1938, 2008),
     xaxt = "n", yaxt = "n", 
     xlab = "Year of Birth", 
     ylab = "Average Agreement with Ninth Grade Abolition", 
     bty = "n", main = "")

# Adding background horizontal grid
grid(nx = NA, ny = NULL, col = "lightgray", lty = "solid")

# Adding vertical era markers
era_lines <- c(1938.5, 1946.5, 1965.5, 1982.5)
abline(v = era_lines, col = "salmon", lwd = 2.5)


### Adding the main line and points
lines(agg_data$year_birth, ma, col = "gray25", lwd = 3)
points(agg_data$year_birth, ma, col = "gray25", pch = 19, cex = 1.2)

### Adding the axes
# Y-axis
y_at <- seq(1, 5, 0.5)
axis(side = 2, at = y_at, 
     labels = y_at, 
     las = 1, 
     tick = TRUE, 
     col = "lightgray")

# X-axis with
axis(side = 1, at = 1938:2008, 
     labels = 1938:2008, 
     las = 2, 
     cex.axis = 0.75, 
     tick = TRUE)

### Adding era labels
text(x = 1942.5, y = 1.5, labels = "eight years", font = 2, col = "salmon")
text(x = 1956.0, y = 1.5, labels = "nine years", font = 2, col = "salmon")
text(x = 1974.0, y = 1.5, labels = "eight years", font = 2, col = "salmon")
text(x = 1995.0, y = 1.5, labels = "nine years", font = 2, col = "salmon")



########## OLS Linear Regression Models
# Model 1 - only the key explanatory variable
model_1 <- lm(w1_vzdel_2 ~ gen_9_self, 
              data = cab_listwise_gen_9_self)

# Model 2 - adding all control variables
model_2 <- lm(w1_vzdel_2 ~ gen_9_self + years_since + high_interest + 
                no_kids + has_kids + w1_left_right + female + university, 
              data = cab_listwise_gen_9_self)

# Model 3 - all predictors + interaction term between personal experience and high political interest
model_3 <- lm(w1_vzdel_2 ~ gen_9_self + years_since + high_interest + 
                no_kids + has_kids + w1_left_right + female + university + 
                devitka_zajem, 
              data = cab_listwise_gen_9_self)

# Model 4 - all predictors + interaction term between personal experience and the number of years since compelting basic school
model_4 <- lm(w1_vzdel_2 ~ gen_9_self + years_since + high_interest + 
                no_kids + has_kids + w1_left_right + female + university + 
                interakce, 
              data = cab_listwise_gen_9_self)

# Exporting the model results (Table A3 under the alternative operationalization of the treatment)
library(sjPlot)
tab_model(model_1, model_2, model_3, model_4, 
          digits = 3, digits.p = 4,
          show.se = TRUE, show.ci = FALSE, 
          show.r2 = TRUE,
          string.est = "Estimate", string.se = "SE", 
          p.threshold = c(0.05, 0.01, 0.001),
          p.style = "stars")



########## Ordered Logit Models
library(MASS)

# Converting the dependent variable to aa ordered factor
cab_listwise_gen_9_self$w1_vzdel_2_ord <- factor(cab_listwise_gen_9_self$w1_vzdel_2, 
                                      levels = 1:5, 
                                      labels = c("Strongly agree", "Somewhat agree", "Neither agree nor disagree",
                                                 "Somewhat disagree", "Strongly disagree"),
                                      ordered = TRUE)

# Model 1 - only the key explanatory variable
model_1_ord <- polr(w1_vzdel_2_ord ~ gen_9_self, 
                    data = cab_listwise_gen_9_self, 
                    Hess = TRUE)

# Model 2 - adding all control variables
model_2_ord <- polr(w1_vzdel_2_ord ~ gen_9_self + years_since + high_interest + 
                      no_kids + has_kids + w1_left_right + female + university, 
                    data = cab_listwise_gen_9_self, 
                    Hess = TRUE)

# Model 3 - all predictors + interaction term between personal experience and high political interest
model_3_ord <- polr(w1_vzdel_2_ord ~ gen_9_self + years_since + high_interest + 
                      no_kids + has_kids + w1_left_right + female + university + 
                      devitka_zajem, 
                    data = cab_listwise_gen_9_self, 
                    Hess = TRUE)

# Model 4 - all predictors + interaction term between personal experience and the number of years since compelting basic school
model_4_ord <- polr(w1_vzdel_2_ord ~ gen_9_self + years_since + high_interest + 
                      no_kids + has_kids + w1_left_right + female + university + 
                      interakce, 
                    data = cab_listwise_gen_9_self, 
                    Hess = TRUE)

##### Exporting the model results
##### Table A4. Robustness checks: Ordered logit models of attitude toward the length of basic education.
tab_model(model_1_ord, model_2_ord, model_3_ord, model_4_ord, 
          digits = 3, digits.p = 4,
          show.se = TRUE, show.ci = FALSE, 
          show.aic = TRUE,
          show.dev = TRUE,
          string.est = "Estimate", string.se = "SE", 
          p.threshold = c(0.05, 0.01, 0.001),
          p.style = "stars")




##### Plots for predicted probabilities
library(ggplot2)
library(MASS)

##### Figure 2 (under the alternative operationalization of the treatment).
### Creating a prediction data frame
# The key independent variable (i.e. the treatment) in the only variable that varies (0 and 1)
# Years since completing basic school is held at its mean.
# High interest, has kid(s), moderate, female with university degree"
pred_data <- data.frame(
  gen_9_self  = c(0, 1),
  years_since   = mean(cab_listwise_gen_9_self$years_since),
  high_interest = 1,
  no_kids       = 0,
  has_kids      = 1,
  w1_left_right = 5,
  female        = 1,
  university    = 1)

# Getting predicted probabilities for each category of the dependent variable
pred_probs <- predict(model_2_ord, newdata = pred_data, type = "probs")

# Converting predicted probability to long format required for ggplot
pred_df <- data.frame(gen_9_self = factor(rep(c(0, 1), each = 5),
                                            labels = c("8 years at basic school", "9 years at basic school")),
                      category = factor(rep(1:5, times = 2)),
                      probability = c(pred_probs[1, ], pred_probs[2, ]))

### Bootstrap confidence intervals
library(boot)

boot_probs <- function(data, indices) {
  d <- data[indices, ]
  m <- polr(w1_vzdel_2_ord ~ gen_9_self + years_since + high_interest +
              no_kids + has_kids + w1_left_right + female + university,
            data = d, Hess = TRUE)
  as.vector(predict(m, newdata = pred_data, type = "probs"))
}

set.seed(123)
boot_out <- boot(data = cab_listwise_gen_9_self, statistic = boot_probs, R = 1000)

# Extracting 95% percentile CIs
# Bootstrap indices are interleaved: 1,3,5,7,9 = 8 years; 2,4,6,8,10 = 9 years
ci_lower <- numeric(10)
ci_upper <- numeric(10)

for (i in 1:10) {
  ci <- boot.ci(boot_out, type = "perc", index = i)
  ci_lower[i] <- ci$percent[4]
  ci_upper[i] <- ci$percent[5]
}

# Reordering to match pred_df row order (rows 1-5 = 8 years, rows 6-10 = 9 years)
idx_8yrs <- c(1, 3, 5, 7, 9)   # odd indices = 8 years at basic school
idx_9yrs <- c(2, 4, 6, 8, 10)  # even indices = 9 years at basic school

pred_df$ci_lower <- c(ci_lower[idx_8yrs], ci_lower[idx_9yrs])
pred_df$ci_upper <- c(ci_upper[idx_8yrs], ci_upper[idx_9yrs])

# Verifying the alignment and checking the predicted probabilities
print(pred_df)
sum(pred_df[pred_df$gen_9_self == "8 years at basic school", "probability"])
sum(pred_df[pred_df$gen_9_self == "9 years at basic school", "probability"])

### Creating the barplot with predicted probabilities and bootstrap CIs
ggplot(pred_df, aes(x = category, y = probability, fill = gen_9_self)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.7), 
           width = 0.6) +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper),
                position = position_dodge(width = 0.7),
                width = 0.2,
                linewidth = 0.7,
                color = "gray30") +
  scale_fill_manual(values = c("steelblue1", "salmon"),
                    name = "Basic school experience:") +
  scale_y_continuous(labels = scales::number_format(accuracy = 0.1),
                     breaks = seq(0, 0.5, 0.1),
                     limits = c(0, 0.6)) +
  scale_x_discrete(labels = c("1 Strongly agree", "2", "3", "4", "5 Strongly disagree")) +
  labs(title = "Probability of Agreement with Ninth Grade Abolition",
       subtitle = "Years since completing basic school at its mean (37 years), high interest, has kid(s), centrist (5 on left-right scale), female, university degree",
       x = "Level of Agreement with Ninth Grade Abolition",
       y = "Predicted probability (based on Model 2)") +
  theme_minimal(base_size = 12) +
  theme(legend.position  = "top",
        plot.title.position = "plot",
        panel.grid.major.x = element_blank())


##### Figure A1 (under the alternative operationalization of the treatment)
# Create a prediction dataset:
# The key independent variable (i.e. the treatment) in the only variable that varies (0 and 1)
# Years since completing basic school held at 5 years.
# Low interest, has kid(s), moderate, female with university degree
pred_data2 <- data.frame(
  gen_9_self  = c(0, 1),
  years_since   = 5,
  high_interest = 0,
  no_kids       = 0,
  has_kids      = 1,
  w1_left_right = 5,
  female        = 1,
  university    = 1)

# Getting predicted probabilities for each category of the dependent variable
pred_probs_2 <- predict(model_2_ord, newdata = pred_data2, type = "probs")

# Convertint predicted probability to long format required for ggplot
pred_df_2 <- data.frame(gen_9_self = factor(rep(c(0, 1), each = 5),
                                              labels = c("8 years at basic school", "9 years at basic school")), 
                        category = factor(rep(1:5, times = 2)), 
                        probability = c(pred_probs_2[1, ], pred_probs_2[2, ]))

### Bootstrap confidence intervals
library(boot)

boot_probs_2 <- function(data, indices) {
  d <- data[indices, ]
  m <- polr(w1_vzdel_2_ord ~ gen_9_self + years_since + high_interest +
              no_kids + has_kids + w1_left_right + female + university,
            data = d, Hess = TRUE)
  as.vector(predict(m, newdata = pred_data2, type = "probs"))
}

set.seed(234)
boot_out <- boot(data = cab_listwise_gen_9_self, statistic = boot_probs_2, R = 1000)

# Extracting 95% percentile CIs
# Bootstrap indices are interleaved: 1,3,5,7,9 = 8 years; 2,4,6,8,10 = 9 years
ci_lower_2 <- numeric(10)
ci_upper_2 <- numeric(10)

for (i in 1:10) {
  ci <- boot.ci(boot_out, type = "perc", index = i)
  ci_lower_2[i] <- ci$percent[4]
  ci_upper_2[i] <- ci$percent[5]
}

# Reordering to match pred_df row order (rows 1-5 = 8 years, rows 6-10 = 9 years)
idx_8yrs <- c(1, 3, 5, 7, 9)   # odd indices = 8 years at basic school
idx_9yrs <- c(2, 4, 6, 8, 10)  # even indices = 9 years at basic school

pred_df_2$ci_lower_2 <- c(ci_lower_2[idx_8yrs], ci_lower_2[idx_9yrs])
pred_df_2$ci_upper_2 <- c(ci_upper_2[idx_8yrs], ci_upper_2[idx_9yrs])

# Verifying the alignment and checking the predicted probabilities
print(pred_df_2)
sum(pred_df_2[pred_df_2$gen_9_self == "8 years at basic school", "probability"])
sum(pred_df_2[pred_df_2$gen_9_self == "9 years at basic school", "probability"])

### Creating the barplot with predicted probabilities and bootstrap CIs
ggplot(pred_df_2, aes(x = category, y = probability, fill = gen_9_self)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.7), 
           width = 0.6) +
  geom_errorbar(aes(ymin = ci_lower_2, ymax = ci_upper_2),
                position = position_dodge(width = 0.7),
                width = 0.2,
                linewidth = 0.7,
                color = "gray30") +
  scale_fill_manual(values = c("steelblue1", "salmon"),
                    name = "Basic school experience:") +
  scale_y_continuous(labels = scales::number_format(accuracy = 0.1),
                     breaks = seq(0, 0.5, 0.1),
                     limits = c(0, 0.5)) +
  scale_x_discrete(labels = c("1 Strongly agree", "2", "3", "4", "5 Strongly disagree")) +
  labs(title = "Probability of Agreement with Ninth Grade Abolition",
       subtitle = "Years since completing basic school fixed at 5 years, low/moderate interest, has kid(s), centrist (5 on left-right scale), female, university degree",
       x = "Level of Agreement with Ninth Grade Abolition",
       y = "Predicted probability (based on Model 2)") +
  theme_minimal(base_size = 12) +
  theme(legend.position  = "top",
        plot.title.position = "plot",
        panel.grid.major.x = element_blank())
